## Analysis of wave-2 survey data from MTurk
## Ariel White (for linked fate project with Claudine Gay, Jennifer Hochschild)
## July 2015

## this file is the same as the code used to produce all MTurk results discussed in the paper, 
## except that we've modified the original dataset to remove identifiers (MTurk worker IDs, IP addresses).

##this is the code we ran to drop identifying data:
#rm(list=ls())
#setwd("/home/ariel/Dropbox/linked fate/secondrounddata/wave2") 
#library(foreign)
#wave2 <- read.csv("LinkedFateMasterSurvey.csv", skip=1, stringsAsFactors=F)
#setwd("/home/ariel/Dropbox/linked fate/replicationdata/mturkdata")
#wave2_deid <- wave2
#wave2_deid$ResponseID <- NA
#wave2_deid$IPAddress <- NA
#wave2_deid$StartDate <- NA
#wave2_deid$EndDate <- NA
#wave2_deid$mturkcode <- NA
#wave2_deid$mturkid <- NA
#write.csv(wave2_deid, "LinkedFateMasterSurvey_deidentified.csv", row.names=F)

rm(list=ls())
setwd("/home/ariel/Dropbox/linked fate/replicationdata/mturkdata") #change this filepath to your filepath.
library(foreign)
wave2 <- read.csv("LinkedFateMasterSurvey_deidentified.csv", stringsAsFactors=F)

#basic idea is to see whether any of these other measures (of different concepts) are closely related to responses to the linked fate items-- to try to see if we might be measuring something different with that question than we think we're measuring. 
#So we basically want to look at interitem correlations.  Also, check to see if original findings replicate in this sample.

#1.) first, do we still see high correlations between the different LF questions? (Look at this both with 1-4 scale and a binary measure from the first question: yes/no).
#2.) and then we can also look at the politicization questions: do LF scores still not predict things like voter turnout?
#3.) then, do we see high correlations between LF (race?) scores (1-4 scale) and:
#- descriptive connectedness (this is 6 questions on a 7-pt likert scale.  We could just average them?  and check cronbach's alpha?)
#- descriptive locus of control (again, 6 questions on a 7-pt likert scale.)
#- normative locus of control (3 questions on a 7-pt likert scale)
#- normative connectedness (this is the one based on the identification with all humanity scale, so we ended up with 7 different items, and we asked each of them for 3 groups-- Americans,the world, local community-- on a 5-point scale.)
    
#start off by setting up some scales.
names(wave2)

#oh, and drop that one person who inexplicably has no race coded
wave2 <- wave2[!(wave2$race==""),]

#need to combine all the different group-specific LF items into just one for each identity.
wave2$lf_gender1 <- NA
wave2[wave2$Do.you.think.that.what.happens.to.women.in.this.country.will.have...something.to.do.with.what.happe...=="Yes"|wave2$Do.you.think.that.what.happens.to.men.in.this.country.will.have...something.to.do.with.what.happens... =="Yes" , "lf_gender1"] <- 1
wave2[wave2$Do.you.think.that.what.happens.to.women.in.this.country.will.have...something.to.do.with.what.happe...=="No"|wave2$Do.you.think.that.what.happens.to.men.in.this.country.will.have...something.to.do.with.what.happens... =="No", "lf_gender1"] <- 0
summary(wave2$lf_gender1)
wave2$lf_gender_scale <- NA
wave2[wave2$lf_gender1==0 & !(is.na(wave2$lf_gender1)), "lf_gender_scale"] <- 0
wave2[wave2$How.much.will.it.affect.you..1 == "Not very much at all", "lf_gender_scale"] <- 1
wave2[wave2$How.much.will.it.affect.you..1 == "Some", "lf_gender_scale"] <- 2
wave2[wave2$How.much.will.it.affect.you..1 == "A lot", "lf_gender_scale"] <- 3
summary(wave2$lf_gender_scale)


wave2$lf_race1 <- NA
wave2$lf_race_scale <- NA
lfracevars1 <- 23:27 

for (i in 1:length(lfracevars1)){
	wave2[wave2[,lfracevars1[i]]=="Yes" & is.na(wave2[,lfracevars1[i]])==F, "lf_race1"] <- 1
	wave2[wave2[,lfracevars1[i]]=="No" & is.na(wave2[,lfracevars1[i]])==F, "lf_race1"] <- 0
} 
summary(wave2$lf_race1)

wave2[wave2$lf_race1==0 & !(is.na(wave2$lf_race1)), "lf_race_scale"] <- 0
wave2[wave2$How.much.will.it.affect.you. == "Not very much at all", "lf_race_scale"] <- 1
wave2[wave2$How.much.will.it.affect.you. == "Some", "lf_race_scale"] <- 2
wave2[wave2$How.much.will.it.affect.you. == "A lot", "lf_race_scale"] <- 3
summary(wave2$lf_race_scale)

wave2$lf_religion1 <- NA
wave2$lf_religion_scale <- NA
lfreligionvars1 <- 32:44 

for (i in 1:length(lfreligionvars1)){
	wave2[wave2[,lfreligionvars1[i]]=="Yes" & is.na(wave2[,lfreligionvars1[i]])==F, "lf_religion1"] <- 1
	wave2[wave2[,lfreligionvars1[i]]=="No" & is.na(wave2[,lfreligionvars1[i]])==F, "lf_religion1"] <- 0
}
summary(wave2$lf_religion1)


wave2[wave2$lf_religion1==0 & !(is.na(wave2$lf_religion1)), "lf_religion_scale"] <- 0
wave2[wave2$How.much.will.it.affect.you..2 == "Not very much at all", "lf_religion_scale"] <- 1
wave2[wave2$How.much.will.it.affect.you..2 == "Some", "lf_religion_scale"] <- 2
wave2[wave2$How.much.will.it.affect.you..2 == "A lot", "lf_religion_scale"] <- 3
summary(wave2$lf_religion_scale) 


wave2$lf_class1 <- NA
wave2$lf_class_scale <- NA
lfclassvars1 <- 46:49 

for (i in 1:length(lfclassvars1)){
	wave2[wave2[,lfclassvars1[i]]=="Yes", "lf_class1"] <- 1
	wave2[wave2[,lfclassvars1[i]]=="No", "lf_class1"] <- 0
}
summary(wave2$lf_class1)

wave2[wave2$lf_class1==0 & !(is.na(wave2$lf_class1)), "lf_class_scale"] <- 0
wave2[wave2$How.much.will.it.affect.you..3 == "Not very much at all", "lf_class_scale"] <- 1
wave2[wave2$How.much.will.it.affect.you..3 == "Some", "lf_class_scale"] <- 2
wave2[wave2$How.much.will.it.affect.you..3 == "A lot", "lf_class_scale"] <- 3
summary(wave2$lf_class_scale)

# clean up partisanship

wave2$party3 <- NA
wave2[wave2[,67]=="Republican", "party3"] <-1 
wave2[wave2[,67]=="Democrat", "party3"] <-3
wave2[wave2[,67]=="Independent", "party3"] <-2 
wave2[wave2[,67]=="No preference", "party3"] <-2 #or should I drop these?

wave2$party7 <- NA
wave2[wave2[,70]=="Strong" & wave2$party3==1, "party7"] <- 1 #strong republican
wave2[wave2[,70]=="Not very strong" & wave2$party3==1, "party7"] <- 2 #weak republican
wave2[wave2[,70]=="Strong" & wave2$party3==3, "party7"] <- 7 #strong dem
wave2[wave2[,70]=="Not very strong" & wave2$party3==3, "party7"] <- 6 #weak dem

wave2[wave2[,69]=="Democratic" & !(is.na(wave2$party3)) & !(is.na(wave2[,69])) & wave2$party3==2, "party7"] <- 5 #lean dem
wave2[wave2[,69]=="Republican" & !(is.na(wave2$party3))  & is.na(wave2[,69])==F  & wave2$party3==2, "party7"] <- 3 #lean rep
wave2[is.na(wave2$party7) & !(is.na(wave2$party3)) & wave2$party3==2, "party7"] <- 4 #still independent-- wait, I guess we didn't leave this as an option? 
summary(wave2$party7)

#political action
wave2$pa_registered <- NA
wave2[wave2$Are.you.currently.registered.to.vote. =="Yes" , "pa_registered"] <- 1
wave2[wave2$Are.you.currently.registered.to.vote. =="No" , "pa_registered"] <- 0
summary(wave2$pa_registered) #85%

wave2$pa_voted <- NA
wave2[wave2$In.talking.to.people.about.elections..we.often.find.that.a.lot.of...people.were.not.able.to.vote.be... =="Yes, I voted." , "pa_voted"] <- 1
wave2[wave2$In.talking.to.people.about.elections..we.often.find.that.a.lot.of...people.were.not.able.to.vote.be... =="No, I did not vote." , "pa_voted"] <- 0
summary(wave2$pa_voted) #68%

wave2$pa_protest <- NA
wave2[wave2$In.the.last.12.months..have.you.attended.a.political.protest.or...rally. =="Yes" , "pa_protest"] <- 1
wave2[wave2$In.the.last.12.months..have.you.attended.a.political.protest.or...rally. =="No" , "pa_protest"] <- 0
summary(wave2$pa_protest) #7%

wave2$pa_organization <- NA
wave2[wave2$In.the.last.twelve.months..have.you.worked.with.others.or.joined.an...organization.in.your.communit... =="Yes" , "pa_organization"] <- 1
wave2[wave2$In.the.last.twelve.months..have.you.worked.with.others.or.joined.an...organization.in.your.communit... =="No" , "pa_organization"] <- 0
summary(wave2$pa_organization) #19%

# look at attention ck
table(wave2$Many.people.get.their.news.and.entertainment.from.a.wide.mix.of...sources..while.others.use.just.a...)
#almost everyone passes.  wonder if this means it's too similar to existing ones?

########################################################################
##then: 1.) look at LF item correlations

gender <- wave2[is.na(wave2$lf_gender_scale)==F,]
cor(gender$lf_race_scale, gender$lf_gender_scale) #.54
#perhaps a bit lower than before b/c separated by so many questions?
religion <- wave2[is.na(wave2$lf_religion_scale)==F,]
cor(religion$lf_race_scale, religion$lf_religion_scale) #.43 ##UPDATE: .37
class <- wave2[is.na(wave2$lf_class_scale)==F,]
cor(class$lf_race_scale, class$lf_class_scale) #.4


########################################################################
## 2.) look at politicization 
cor(wave2$pa_voted, wave2$lf_race_scale, use = "pairwise.complete.obs")
cor(wave2$pa_registered, wave2$lf_race_scale, use = "pairwise.complete.obs")

race_reg <- lm(pa_registered ~ lf_race_scale , data = wave2); summary(race_reg)
race_reg <- lm(pa_registered ~ lf_race_scale +gender +race , data = wave2); summary(race_reg)

race_voted <- lm(pa_voted ~ lf_race_scale , data = wave2); summary(race_voted)
race_voted <- lm(pa_voted ~ lf_race_scale +gender +race , data = wave2); summary(race_voted)

race_protest <- lm(pa_protest ~ lf_race_scale , data = wave2); summary(race_protest)
race_protest <- lm(pa_protest ~ lf_race_scale +gender +race, data = wave2); summary(race_protest)

race_organization <- lm(pa_organization ~ lf_race_scale , data = wave2); summary(race_organization)
race_organization <- lm(pa_organization ~ lf_race_scale +gender +race, data = wave2); summary(race_organization)

race_party <- lm(party7 ~ lf_race_scale, data = wave2); summary(race_party)
race_party <- lm(party7 ~ lf_race_scale +gender +race, data = wave2); summary(race_party)

#run this for black respondents only.
black <- wave2[wave2$race=="Black, Non-Hispanic",]
race_regb <- lm(pa_registered ~ lf_race_scale , data = black); summary(race_regb)
race_regb1 <- lm(pa_registered ~ lf_race_scale +gender+class, data = black); summary(race_regb1)
#hm.

race_votedb <- lm(pa_voted ~ lf_race_scale , data = black); summary(race_votedb)
race_votedb1 <- lm(pa_voted ~ lf_race_scale +gender +class , data = black); summary(race_votedb1)

race_protestb <- lm(pa_protest ~ lf_race_scale , data = black); summary(race_protestb)
race_protestb1 <- lm(pa_protest ~ lf_race_scale +gender +class, data = black); summary(race_protestb1)
#hmm

race_organizationb <- lm(pa_organization ~ lf_race_scale , data = black); summary(race_organizationb)
race_organizationb1 <- lm(pa_organization ~ lf_race_scale +gender + class, data = black); summary(race_organizationb1)

race_partyb <- lm(party7 ~ lf_race_scale, data = black); summary(race_partyb)
race_partyb1 <- lm(party7 ~ lf_race_scale +gender+class, data =black); summary(race_partyb1)

library(stargazer)
stargazer(race_regb1, race_votedb1, race_protestb1, race_organizationb1, race_partyb1, 
	out = "LFrace_politicization_black1.tex", float.env = "sidewaystable",
	dep.var.labels = c("Registration", "Voting", "Protest", "Org. Membership", "Party (7-pt.)"),
	covariate.labels = c("Linked Fate", "Male", "Middle Class", "Upper Class", "Working Class"),
	title= "Predicting Political Behavior with Black Racial Linked Fate (MTurk Sample)",
	label = "racialLFpol_b")

stargazer(race_regb, race_votedb, race_protestb, race_organizationb, race_partyb, 
	out = "LFrace_politicization_black.tex", float.env = "sidewaystable",
	dep.var.labels = c("Registration", "Voting", "Protest", "Org. Membership", "Party (7-pt.)"),
	covariate.labels = c("Linked Fate"),
	title= "Predicting Political Behavior with Black Racial Linked Fate (MTurk Sample)",
	label = "racialLFpol_b")

#same deal for class
class_reg <- lm(pa_registered ~ lf_class_scale , data = wave2); summary(class_reg) #maybe?
class_reg <- lm(pa_registered ~ lf_class_scale +gender +race , data = wave2); summary(class_reg) #eh

class_voted <- lm(pa_voted ~ lf_class_scale , data = wave2); summary(class_voted)
class_voted <- lm(pa_voted ~ lf_class_scale +gender +race , data = wave2); summary(class_voted)

class_protest <- lm(pa_protest ~ lf_class_scale , data = wave2); summary(class_protest)
class_protest <- lm(pa_protest ~ lf_class_scale +gender +race, data = wave2); summary(class_protest)

class_organization <- lm(pa_organization ~ lf_class_scale , data = wave2); summary(class_organization)
class_organization <- lm(pa_organization ~ lf_class_scale +gender +race, data = wave2); summary(class_organization)

class_party <- lm(party7 ~ lf_class_scale, data = wave2); summary(class_party)
class_party <- lm(party7 ~ lf_class_scale +gender +race, data = wave2); summary(class_party)

##and gender
gender_reg <- lm(pa_registered ~ lf_gender_scale , data = wave2); summary(gender_reg)
gender_reg <- lm(pa_registered ~ lf_gender_scale +gender +race , data = wave2); summary(gender_reg)

gender_voted <- lm(pa_voted ~ lf_gender_scale , data = wave2); summary(gender_voted)
gender_voted <- lm(pa_voted ~ lf_gender_scale +gender +race, data = wave2); summary(gender_voted)

gender_protest <- lm(pa_protest ~ lf_gender_scale , data = wave2); summary(gender_protest)
gender_protest <- lm(pa_protest ~ lf_gender_scale +gender +race, data = wave2); summary(gender_protest)

gender_organization <- lm(pa_organization ~ lf_gender_scale , data = wave2); summary(gender_organization) #maybe maybe?
gender_organization <- lm(pa_organization ~ lf_gender_scale +gender +race, data = wave2); summary(gender_organization)

gender_party <- lm(party7 ~ lf_gender_scale, data = wave2); summary(gender_party)
gender_party <- lm(party7 ~ lf_gender_scale+gender +race, data = wave2); summary(gender_party)

##and religion
religion_reg <- lm(pa_registered ~ lf_religion_scale , data = wave2); summary(religion_reg)
religion_reg <- lm(pa_registered ~ lf_religion_scale +gender +race , data = wave2); summary(religion_reg)

religion_voted <- lm(pa_voted ~ lf_religion_scale , data = wave2); summary(religion_voted)
religion_voted <- lm(pa_voted ~ lf_religion_scale +gender +race, data = wave2); summary(religion_voted)

religion_protest <- lm(pa_protest ~ lf_religion_scale , data = wave2); summary(religion_protest)
religion_protest <- lm(pa_protest ~ lf_religion_scale +gender +race, data = wave2); summary(religion_protest)

religion_organization <- lm(pa_organization ~ lf_religion_scale , data = wave2); summary(religion_organization) #maybe maybe?
religion_organization <- lm(pa_organization ~ lf_religion_scale +gender +race, data = wave2); summary(religion_organization)

religion_party <- lm(party7 ~ lf_religion_scale, data = wave2); summary(religion_party)
religion_party <- lm(party7 ~ lf_religion_scale+gender +race, data = wave2); summary(religion_party)

stargazer(race_reg, race_voted, race_protest, race_organization, race_party, 
	out = "LFrace_politicization.tex", float.env = "sidewaystable",
	dep.var.labels = c("Registration", "Voting", "Protest", "Org. Membership", "Party (7-pt.)"),
	covariate.labels = c("Linked Fate", "Male", "Black", "Hispanic", "White"),
	title= "Predicting Political Behavior with Racial Linked Fate (MTurk Sample)",
	label = "racialLFpol")

stargazer(class_reg, class_voted, class_protest, class_organization, class_party, 
	out = "LFclass_politicization.tex", float.env = "sidewaystable",
	dep.var.labels = c("Registration", "Voting", "Protest", "Org. Membership", "Party (7-pt.)"),
	covariate.labels = c("Linked Fate", "Male", "Black", "Hispanic", "White"),
	title= "Predicting Political Behavior with Class Linked Fate (MTurk Sample)",
	label = "classLFpol")

stargazer(gender_reg, gender_voted, gender_protest, gender_organization, gender_party, out= "LFgender_politicization.tex", float.env = "sidewaystable",
	dep.var.labels = c("Registration", "Voting", "Protest", "Org. Membership", "Party (7-pt.)"),
	covariate.labels = c("Linked Fate", "Male", "Black", "Hispanic", "White"),
	title= "Predicting Political Behavior with Gender Linked Fate (MTurk Sample)",
	label = "genderLFpol")

stargazer(religion_reg, religion_voted, religion_protest, religion_organization, religion_party, out= "LFreligion_politicization.tex", float.env = "sidewaystable",
	dep.var.labels = c("Registration", "Voting", "Protest", "Org. Membership", "Party (7-pt.)"),
	covariate.labels = c("Linked Fate", "Male", "Black", "Hispanic", "White"),
	title= "Predicting Political Behavior with Religious Linked Fate (MTurk Sample)",
	label = "religionLFpol")


########################################################################
## 3.) now the new stuff: look at how similar LF is to these other questions.
#for each of the four sets of questions, I'll first clean them up, then look at alpha, then compare to LF (race)
library(car)

#descriptive LOC (53, 54*, 57*, 58, 61*, 62) *reverse-coded
wave2$descLOC_ownactions <- recode(wave2[,53], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$descLOC_ownactions))

wave2$descLOC_conflict <- recode(wave2[,54], "'Strongly Disagree'=7; 'Somewhat Disagree'=6; 'Slightly Disagree'=5;'Neither Agree nor Disagree'=4;'Slightly Agree'=3;'Somewhat Agree'=2;'Strongly Agree'=1;", as.factor.result=FALSE)
summary(as.numeric(wave2$descLOC_conflict))

wave2$descLOC_chiefly <- recode(wave2[,57], "'Strongly Disagree'=7; 'Somewhat Disagree'=6; 'Slightly Disagree'=5;'Neither Agree nor Disagree'=4;'Slightly Agree'=3;'Somewhat Agree'=2;'Strongly Agree'=1;", as.factor.result=FALSE)
summary(as.numeric(wave2$descLOC_chiefly))

wave2$descLOC_protect <- recode(wave2[,58], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$descLOC_protect))

wave2$descLOC_mostly <- recode(wave2[,61], "'Strongly Disagree'=7; 'Somewhat Disagree'=6; 'Slightly Disagree'=5;'Neither Agree nor Disagree'=4;'Slightly Agree'=3;'Somewhat Agree'=2;'Strongly Agree'=1;", as.factor.result=FALSE)
summary(as.numeric(wave2$descLOC_mostly))

wave2$descLOC_determine <- recode(wave2[,62], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$descLOC_determine))

wave2$descLOC_ownactions <- as.numeric(wave2$descLOC_ownactions)
wave2$descLOC_conflict <- as.numeric(wave2$descLOC_conflict)
wave2$descLOC_chiefly <- as.numeric(wave2$descLOC_chiefly)
wave2$descLOC_protect <- as.numeric(wave2$descLOC_protect)
wave2$descLOC_mostly <- as.numeric(wave2$descLOC_mostly)
wave2$descLOC_determine <- as.numeric(wave2$descLOC_determine)

wave2$descriptiveLOC <- rowMeans(wave2[,c("descLOC_ownactions", "descLOC_conflict", "descLOC_chiefly", "descLOC_protect", "descLOC_mostly", "descLOC_determine" )]) #v. little missingness anyway
summary(wave2$descriptiveLOC) #hm, not a huge amount of variation, but some.  I worry about these extreme people (though, with reverse-coded items we should never get exactly 1 or 7 from people always choosing strongly disagree)

library(psych)
descLOC <- wave2[, c("descLOC_ownactions", "descLOC_conflict", "descLOC_chiefly", "descLOC_protect", "descLOC_mostly", "descLOC_determine" )]
alpha(descLOC) #.8

cor(wave2$descriptiveLOC, wave2$lf_race_scale, use = "pairwise.complete.obs")
#not big: -.07.

#descriptive connectedness (51, 52*, 55, 56*, 59, 60)
wave2$descconn_allconnected <- recode(wave2[,51], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$descconn_allconnected))

wave2$descconn_strangers <- recode(wave2[,52], "'Strongly Disagree'=7; 'Somewhat Disagree'=6; 'Slightly Disagree'=5;'Neither Agree nor Disagree'=4;'Slightly Agree'=3;'Somewhat Agree'=2;'Strongly Agree'=1;", as.factor.result=FALSE)
summary(as.numeric(wave2$descconn_strangers))

wave2$descconn_twopeople <- recode(wave2[,55], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$descconn_twopeople))

wave2$descconn_little <- recode(wave2[,56], "'Strongly Disagree'=7; 'Somewhat Disagree'=6; 'Slightly Disagree'=5;'Neither Agree nor Disagree'=4;'Slightly Agree'=3;'Somewhat Agree'=2;'Strongly Agree'=1;", as.factor.result=FALSE)
summary(as.numeric(wave2$descconn_little))

wave2$descconn_interconnected <- recode(wave2[,59], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$descconn_interconnected))

wave2$descconn_separation <- recode(wave2[,60], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$descconn_separation))

wave2$descconn_allconnected <- as.numeric(wave2$descconn_allconnected)
wave2$descconn_strangers <- as.numeric(wave2$descconn_strangers)
wave2$descconn_twopeople <- as.numeric(wave2$descconn_twopeople)
wave2$descconn_little <- as.numeric(wave2$descconn_little)
wave2$descconn_interconnected <- as.numeric(wave2$descconn_interconnected)
wave2$descconn_separation <- as.numeric(wave2$descconn_separation)

wave2$descriptiveconn <- rowMeans(wave2[,c("descconn_allconnected", "descconn_strangers", "descconn_twopeople", "descconn_little", "descconn_interconnected", "descconn_separation")]) #v. little missingness anyway
summary(wave2$descriptiveconn) #hm, not a huge amount of variation, but some.  I worry about these extreme people (though, with reverse-coded items we should never get exactly 1 or 7 from people always choosing strongly disagree)

library(psych)
descconn <- wave2[, c("descconn_allconnected", "descconn_strangers", "descconn_twopeople", "descconn_little", "descconn_interconnected", "descconn_separation")]
alpha(descconn) #.83

cor(wave2$descriptiveconn, wave2$lf_race_scale, use = "pairwise.complete.obs") #.27


#normative LOC, 73-75
table(wave2[, 74])

wave2$normLOC_goitalone <- recode(wave2[,73], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$normLOC_goitalone))
wave2$normLOC_admiration <- recode(wave2[,74], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$normLOC_admiration))
wave2$normLOC_dependent <- recode(wave2[,75], "'Strongly Disagree'=1; 'Somewhat Disagree'=2; 'Slightly Disagree'=3;'Neither Agree nor Disagree'=4;'Slightly Agree'=5;'Somewhat Agree'=6;'Strongly Agree'=7;", as.factor.result=FALSE)
summary(as.numeric(wave2$normLOC_dependent))

wave2$normLOC_goitalone <- as.numeric(wave2$normLOC_goitalone)
wave2$normLOC_admiration <- as.numeric(wave2$normLOC_admiration)
wave2$normLOC_dependent <- as.numeric(wave2$normLOC_dependent)
wave2$normativeLOC <- rowMeans(wave2[,c("normLOC_goitalone","normLOC_admiration", "normLOC_dependent")]) #v. little missingness anyway

library(psych)
normLOC <- wave2[,c("normLOC_goitalone","normLOC_admiration", "normLOC_dependent")]
alpha(normLOC) #.69

cor(wave2$normativeLOC, wave2$lf_race_scale, use = "pairwise.complete.obs") #basically nothing

#normative connectedness
#start by producing three different scales by averaging each of the 6 scales for the three groups

#1.closeness
wave2$normcon_1_close <-  recode(wave2[,76], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very'=5;", as.factor.result=FALSE)
wave2$normcon_2_close <-  recode(wave2[,77], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very'=5;", as.factor.result=FALSE)
wave2$normcon_3_close <-  recode(wave2[,78], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very'=5;", as.factor.result=FALSE)
wave2$normcon_1_close <- as.numeric(wave2$normcon_1_close)
wave2$normcon_2_close <- as.numeric(wave2$normcon_2_close)
wave2$normcon_3_close <- as.numeric(wave2$normcon_3_close)
#2. in common
wave2$normcon_1_common <-  recode(wave2[,79], "'Nothing'=1; 'Just a little'=2; 'Some'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_2_common <-  recode(wave2[,80], "'Nothing'=1; 'Just a little'=2; 'Some'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_3_common <-  recode(wave2[,81], "'Nothing'=1; 'Just a little'=2; 'Some'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_1_common <- as.numeric(wave2$normcon_1_common)
wave2$normcon_2_common <- as.numeric(wave2$normcon_2_common)
wave2$normcon_3_common <- as.numeric(wave2$normcon_3_common)
#3. family
wave2$normcon_1_family <-  recode(wave2[,82], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_2_family <-  recode(wave2[,83], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_3_family <-  recode(wave2[,84], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_1_family <- as.numeric(wave2$normcon_1_family)
wave2$normcon_2_family <- as.numeric(wave2$normcon_2_family)
wave2$normcon_3_family <- as.numeric(wave2$normcon_3_family)
#4. feel a part of
wave2$normcon_1_part <-  recode(wave2[,85], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_2_part <-  recode(wave2[,86], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_3_part <-  recode(wave2[,87], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_1_part <- as.numeric(wave2$normcon_1_part)
wave2$normcon_2_part <- as.numeric(wave2$normcon_2_part)
wave2$normcon_3_part <- as.numeric(wave2$normcon_3_part)
#5. upset when bad things happen
wave2$normcon_1_upset <-  recode(wave2[,88], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_2_upset <-  recode(wave2[,89], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_3_upset <-  recode(wave2[,90], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_1_upset <- as.numeric(wave2$normcon_1_upset)
wave2$normcon_2_upset <- as.numeric(wave2$normcon_2_upset)
wave2$normcon_3_upset <- as.numeric(wave2$normcon_3_upset)
#6. want to help
wave2$normcon_1_help <-  recode(wave2[,91], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_2_help <-  recode(wave2[,92], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_3_help <-  recode(wave2[,93], "'Not at all'=1; 'Just a little'=2; 'Somewhat'=3;'Quite a bit'=4;'Very much'=5;", as.factor.result=FALSE)
wave2$normcon_1_help <- as.numeric(wave2$normcon_1_help)
wave2$normcon_2_help <- as.numeric(wave2$normcon_2_help)
wave2$normcon_3_help <- as.numeric(wave2$normcon_3_help)

wave2$normcon_community <- rowMeans(wave2[,c("normcon_1_close", "normcon_1_common","normcon_1_family","normcon_1_part","normcon_1_upset","normcon_1_help")]) 
wave2$normcon_americans <- rowMeans(wave2[,c("normcon_2_close", "normcon_2_common","normcon_2_family","normcon_2_part","normcon_2_upset","normcon_2_help")]) 
wave2$normcon_world <- rowMeans(wave2[,c("normcon_3_close", "normcon_3_common","normcon_3_family","normcon_3_part","normcon_3_upset","normcon_3_help")]) 
wave2$normcon_all <- rowMeans(wave2[,c("normcon_1_close", "normcon_1_common","normcon_1_family","normcon_1_part","normcon_1_upset","normcon_1_help","normcon_2_close", "normcon_2_common","normcon_2_family","normcon_2_part","normcon_2_upset","normcon_2_help",
"normcon_3_close", "normcon_3_common","normcon_3_family","normcon_3_part","normcon_3_upset","normcon_3_help")]) 

summary(wave2$normcon_community)
summary(wave2$normcon_americans)
summary(wave2$normcon_world)
summary(wave2$normcon_all)

library(psych)
normcon1 <- wave2[,c("normcon_1_close", "normcon_1_common","normcon_1_family","normcon_1_part","normcon_1_upset","normcon_1_help")]
normcon2 <- wave2[,c("normcon_2_close", "normcon_2_common","normcon_2_family","normcon_2_part","normcon_2_upset","normcon_2_help")]
normcon3 <- wave2[,c("normcon_3_close", "normcon_3_common","normcon_3_family","normcon_3_part","normcon_3_upset","normcon_3_help")]
normcon_all <- wave2[,c("normcon_1_close", "normcon_1_common","normcon_1_family","normcon_1_part","normcon_1_upset","normcon_1_help","normcon_2_close", "normcon_2_common","normcon_2_family","normcon_2_part","normcon_2_upset","normcon_2_help",
"normcon_3_close", "normcon_3_common","normcon_3_family","normcon_3_part","normcon_3_upset","normcon_3_help")]

alpha(normcon1) #.91
alpha(normcon2) #.89
alpha(normcon3) #.88
alpha(normcon_all) #94

#how correlated are the 3 separate scales with racial linked fate?
cor(wave2$normcon_community, wave2$lf_race_scale, use = "pairwise.complete.obs")
cor(wave2$normcon_americans, wave2$lf_race_scale, use = "pairwise.complete.obs")
cor(wave2$normcon_world, wave2$lf_race_scale, use = "pairwise.complete.obs")
#a little.
cor(wave2$normcon_all, wave2$lf_race_scale, use = "pairwise.complete.obs")

#what about the other scale we talked about, this idea that you don't see geographically-close people as any more important.
wave2$normcon_localexcess <- (wave2$normcon_community-wave2$normcon_world )
summary(wave2$normcon_localexcess)
cor(wave2$normcon_localexcess, wave2$lf_race_scale, use = "pairwise.complete.obs")
#subtract individual's highest from lowest value
wave2$normcon_minmax <- NA
for (i in 1:nrow(wave2)){
	comm <- wave2$normcon_community[i]
	am <- wave2$normcon_americans[i]
	world <- wave2$normcon_world[i]
	wave2$normcon_minmax[i] <- max(comm, am, world) - min(comm, am, world)
}
cor(wave2$normcon_minmax, wave2$lf_race_scale, use = "pairwise.complete.obs")


wave2$normcon_indvar <- apply(wave2[, c("normcon_community", "normcon_americans", "normcon_world")],1, function(x) var(as.numeric(x)))
cor(wave2$normcon_indvar, wave2$lf_race_scale, use = "pairwise.complete.obs")
#nothing-- not surprising, given how noisy this is going to be.
#is there some way of better incorporating the 6 different questions? hmm. like average variance (of the three group answers) within each question?  So take those 6 variances and average them?  
#yes, this is a lot of tests-- just trying to be sure I've given this concept a fair shake before calling it a null.
wave2$v1 <- apply(wave2[, c("normcon_1_close","normcon_2_close","normcon_3_close")],1, function(x) var(as.numeric(x)))
wave2$v2 <- apply(wave2[, c("normcon_1_common","normcon_2_common","normcon_3_common")],1, function(x) var(as.numeric(x)))
wave2$v3 <- apply(wave2[, c("normcon_1_family","normcon_2_family","normcon_3_family")],1, function(x) var(as.numeric(x)))
wave2$v4 <- apply(wave2[, c("normcon_1_part","normcon_2_part","normcon_3_part")],1, function(x) var(as.numeric(x)))
wave2$v5 <- apply(wave2[, c("normcon_1_upset","normcon_2_upset","normcon_3_upset")],1, function(x) var(as.numeric(x)))
wave2$v6 <- apply(wave2[, c("normcon_1_help","normcon_2_help","normcon_3_help")],1, function(x) var(as.numeric(x)))
wave2$normcon_avgvar <-  rowMeans(wave2[,c("v1", "v2", "v3", "v4", "v5", "v6")]) 
cor(wave2$normcon_avgvar, wave2$lf_race_scale, use = "pairwise.complete.obs")

summary(wave2$avgvar)

#make matrix of all correlations (between all 4 LF items and the other stuff.
storage <- as.data.frame(matrix(nrow=9, ncol=5))
colnames(storage) <- c("measure","LF_race", "LF_class", "LF_gender", "LF_religion")
storage$measure <- c("descriptiveLOC","normativeLOC","descriptiveconn","normcon_community","normcon_americans","normcon_world","normcon_localexcess","normcon_indvar","normcon_avgvar")

#now loop through and fill them in
for (i in 1:nrow(storage)){
	storage[i,2] <- cor(wave2$lf_race_scale, wave2[,storage[i,1]], use = "pairwise.complete.obs")
}
#for the other measures, note smaller sample size.
nrow(gender); nrow(class); nrow(religion); 
for (i in 1:nrow(storage)){
	storage[i,4] <- cor(wave2$lf_gender_scale, wave2[,storage[i,1]], use = "pairwise.complete.obs")
}

for (i in 1:nrow(storage)){
	storage[i,5] <- cor(wave2$lf_religion_scale, wave2[,storage[i,1]], use = "pairwise.complete.obs")
}

for (i in 1:nrow(storage)){
	storage[i,3] <- cor(wave2$lf_class_scale, wave2[,storage[i,1]], use = "pairwise.complete.obs")
}

storage[1:6,]
library(xtable)

Measure <- c("Locus of Control - Descriptive", "Locus of Control - Normative", "Connectedness - Descriptive",  "Connectedness - Normative (`Local Community')", "Connectedness - Normative (`Americans')","Connectedness - Normative (`World')")
storageprint <- cbind(Measure, storage[1:6,2:5])

print(xtable(storage), file="correlationmatrix.tex")
print(xtable(storageprint), file="correlationmatrix_short.tex")


######################################
# output some results
######################################

#quick question-- how does the gender breakdown look by race
table(wave2$race, wave2$gender)

# do some plots of the LF levels.
summary(wave2$lf_race_scale)
table(wave2$lf_race_scale, wave2$race)

#plot all the scales.
scalemeans <- c(mean(wave2$lf_race_scale , na.rm=T), mean(wave2$lf_class_scale , na.rm=T), mean(wave2$lf_gender_scale , na.rm=T), mean(wave2$lf_religion_scale , na.rm=T))

pdf("LF_levels.pdf")
barplot(scalemeans, main = "Mean Level of Linked Fate", ylim=c(0,3), 
	names.arg=c("Race", "Class", "Gender", "Religion"), ylab = "Mean respondent level of linked fate")
dev.off()


#race LF by race plot
library(doBy)
raceby <- summaryBy(lf_race_scale~ race, data = wave2,
  FUN = mean )

pdf("lfrace_byrace.pdf")
barplot(raceby$lf_race_scale.mean[1:4], main = "Racial Linked Fate (by Race), MTurk sample", names.arg=c("Asian", "Black", "Hispanic", "White"), ylim=c(0,3), ylab = "Mean respondent level of linked fate")
dev.off()


#test whether black LF is different from everyone else.
wave2$black <- 0
wave2[wave2$race=="Black, Non-Hispanic", "black"] <- 1

t.test(wave2[wave2$black==1, "lf_race_scale"], wave2[wave2$black==0, "lf_race_scale"])
#black racial LF is higher than others in this sample.

#related Q: are black respondents more likely than others to say yes to the first question?
t.test(wave2[wave2$black==1, "lf_race1"], wave2[wave2$black==0, "lf_race1"])
#yes.

#whites/nonwhites?
wave2$white <- 0
wave2[wave2$race=="White, Non-Hispanic", "white"] <- 1
racestatus <- t.test(wave2[wave2$white==1, "lf_race_scale"], wave2[wave2$white==0, "lf_race_scale"])


#same deal for gender
genderby <- summaryBy(lf_gender_scale~ gender, data = wave2,FUN = mean, na.rm=T)

c(genderby$gender)
pdf("lfgender_bygender.pdf")
barplot(genderby$lf_gender_scale.mean, main = "Gender Linked Fate (by Gender), MTurk sample", names.arg=c("Women", "Men"), ylim=c(0,3))
dev.off()

#test whether women's LF is different from men
wave2$woman <- 0
wave2[wave2$gender=="Female", "woman"] <- 1

genderstatus <- t.test(wave2[wave2$woman==0, "lf_gender_scale"], wave2[wave2$woman==1, "lf_gender_scale"])
#yup, different.

#same deal for class
classby <- summaryBy(lf_class_scale~ class, data = wave2,FUN = mean, na.rm=T)

c(classby$class)
pdf("lf_byclass.pdf")
barplot(classby$lf_class_scale.mean, main = "Class Linked Fate (by Class), MTurk sample", names.arg=c("Lower", "Middle", "Upper", "Working"), ylim=c(0,3))
dev.off()
#but note how few people are in some of these cells, esp. upper.

wave2$lowerworking <- 0
wave2[wave2$class=="the lower class" | wave2$class=="the working class", "lowerworking"] <- 1 
table(wave2$lowerworking) #nearly all the rest are "middle".

classstatus <- t.test(wave2[wave2$lowerworking==0, "lf_class_scale"], wave2[wave2$lowerworking==1, "lf_class_scale"]) #no difference.

#same deal for religion?  How to split-- christian, not?
wave2$christian <- 0
wave2[wave2$religion == "Baptist—any denomination" | wave2$religion == "Catholic" |wave2$religion == "Eastern Orthodox" |
	wave2$religion == "Mormon" |wave2$religion == "Other Christian" |wave2$religion == "Pentecostal" |
	wave2$religion == "Protestant (e.g., Methodist, Lutheran, Presbyterian, Episcopal)" , "christian"] <- 1
wave2[wave2$religion=="", "christian"] <- NA
table(wave2$christian)


t.test(wave2[wave2$christian==1, "lf_religion_scale"], wave2[wave2$christian==0, "lf_religion_scale"]) #not quite sig (p=.06) in the other direction, Christians showing higher LF, though should note this includes non-religious people.

#now do it without nonreligious.
 wave2[wave2$religion=="None", "christian"] <- NA
religionstatus <-t.test(wave2[wave2$christian==1, "lf_religion_scale"], wave2[wave2$christian==0, "lf_religion_scale"]) 


#and now plot LF by race in four panels: all LF measures.  
# Might want to include n in the title of each panel, or else pull it to note in text.

raceby <- summaryBy(lf_race_scale~ race, data = wave2,
  FUN = mean )
genderby <- summaryBy(lf_gender_scale~ race, data = wave2[!(is.na(wave2$lf_gender_scale)),],FUN = mean ); nrow(wave2[!(is.na(wave2$lf_gender_scale)),])

classby <- summaryBy(lf_class_scale~ race, data = wave2[!(is.na(wave2$lf_class_scale)),],FUN = mean ); nrow(wave2[!(is.na(wave2$lf_class_scale)),])

religionby <- summaryBy(lf_religion_scale~ race, data = wave2[!(is.na(wave2$lf_religion_scale)),],FUN = mean ); nrow(wave2[!(is.na(wave2$lf_religion_scale)),])

colors = c(gray(.2), gray(.4), gray(.6), gray(.8))
pdf(file= "LFbyrace_fig1rep.pdf")
par(mfrow = c(2,2))
barplot(raceby$lf_race_scale.mean, main = "Racial Linked Fate (by Race), \n MTurk sample (n=449)", names.arg=c("Asian", "Black", "Hispanic", "White"), ylim=c(0,3), ylab = "Mean respondent level of linked fate", col=colors)
barplot(classby$lf_class_scale.mean, main = "Class Linked Fate (by Race), \n MTurk sample (n=108)", names.arg=c("Asian", "Black", "Hispanic", "White"), ylim=c(0,3), ylab = "Mean respondent level of linked fate", col=colors)
barplot(genderby$lf_gender_scale.mean, main = "Gender Linked Fate (by Race), \n MTurk sample (n=93)", names.arg=c("Asian", "Black", "Hispanic", "White"), ylim=c(0,3), ylab = "Mean respondent level of linked fate", col=colors)
barplot(religionby$lf_religion_scale.mean, main = "Religious Linked Fate (by Race), \n MTurk sample (n=74)", names.arg=c("Asian", "Black", "Hispanic", "White"), ylim=c(0,3), ylab = "Mean respondent level of linked fate", col=colors)
dev.off()

##replicate fig2 from the paper (sociological differences plot)
racestatus
diffs <- c(
(classstatus$estimate[1]-classstatus$estimate[2]), 
(genderstatus$estimate[1]-genderstatus$estimate[2]), 
(religionstatus$estimate[1]-religionstatus$estimate[2]),
 (racestatus$estimate[1]-racestatus$estimate[2]))

CIlow <- c(classstatus$conf.int[1], genderstatus$conf.int[1], religionstatus$conf.int[1], racestatus$conf.int[1])
CIhigh <- c(classstatus$conf.int[2], genderstatus$conf.int[2], religionstatus$conf.int[2], racestatus$conf.int[2])
diffs1 <- cbind(diffs, CIlow, CIhigh)

##some plot setup:
ruler <- seq(1.5,0, by=-.5)
diffs2 <- cbind(diffs1, ruler)
diffsall <- diffs2[]
labels <- c("Upper - Lower class", "Male - Female", "Christian - NonChristian", "White - Nonwhite")

##PLOT
pdf("sociologicaldiffs_june2014_Mturk.pdf")
#op <- par(mar = c(5,8.5,4,2) + 0.1) ## default is c(5,4,4,2) + 0.1
par(mai=c(.35,.15,.275,.15), oma=c(3.4,8.45,1.55,.2))

plot(NULL, NULL, main = "Linked Fate in Privileged and Non-privileged Groups (MTurk Sample)", xlim = c(-1.5, 1.5), ylim = c(-.25, 1.75), xlab="Difference of means", yaxt='n', ylab="", cex.axis=.75, cex.main = .95)
axis(2, at=ruler, labels =labels, las=1, tick=FALSE, cex.axis=.95) #set up y labels
abline(h= .5, lty=3, col="lightgray") #lay down lines to guide the eye across
abline(h= 1, lty=3, col="lightgray")
abline(h=1.5, lty=3, col="lightgray")
abline(v=0)

points(diffsall[1:4,1], ruler, pch=19) #plot difference ests
segments(diffsall[1:4,2], ruler, diffsall[1:4,3], ruler) #CI's in the last two columns.

dev.off()

####################################################################################
#also look briefly at demographics-- though note we only asked the ones relevant to our LF questions (race, gender, religion, class)
# no actual income question, age, education.

demogs <- wave2[,c("race","gender","class","religion","party7", "christian")]
table(demogs$race)
table(demogs$gender)
table(demogs$class)
table(demogs$religion)
table(demogs$party7)
sum(table(demogs$party7)[1:3]); sum(table(demogs$party7)[4:6]) #very liberal
table(demogs$christian)

#also look at class x race, gender x race.

table(demogs$race, demogs$gender)
table(demogs$race, demogs$class)

#gender imbalanced-- not surprising from convenience sample.



